import glob
import numpy as np
import os
import shutil
from utils import log_progress
import glob
import numpy as np
import os
import shutil
from utils import log_progress
import tensorflow as tf
config = tf.compat.v1.ConfigProto(allow_soft_placement = True)
config.gpu_options.allow_growth = True
sess = tf.compat.v1.Session(config = config)

np.random.seed(42)

files = glob.glob('F:\\dogsvscats\\train\\*')

len(files)

# 'F:\\dogsvscats\\train\\cat.0.jpg 25000
cat_files = [fn for fn in files if 'cat' in fn]  # files是包含cat/dog的文件路径名.
dog_files = [fn for fn in files if 'dog' in fn]
# 随机选择1500张训练
cat_train = np.random.choice(cat_files, size=1500, replace=False)
dog_train = np.random.choice(dog_files, size=1500, replace=False)
# 剩下的部分
cat_files = list(set(cat_files) - set(cat_train))  # 23500
dog_files = list(set(dog_files) - set(dog_train))
# 500张验证集
cat_val = np.random.choice(cat_files, size=500, replace=False)
dog_val = np.random.choice(dog_files, size=500, replace=False)
cat_files = list(set(cat_files) - set(cat_val))
dog_files = list(set(dog_files) - set(dog_val))
# 500张测试集
cat_test = np.random.choice(cat_files, size=500, replace=False)
dog_test = np.random.choice(dog_files, size=500, replace=False)

print('Cat datasets:', cat_train.shape, cat_val.shape, cat_test.shape)
print('Dog datasets:', dog_train.shape, dog_val.shape, dog_test.shape)

train_dir = 'F:\\dogsvscats\\training_data'
val_dir = 'F:\\dogsvscats\\validation_data'
test_dir = 'F:\\dogsvscats\\test_data'
# 数组拼接，dog和cat拼接一起,3000
train_files = np.concatenate([cat_train, dog_train])
validate_files = np.concatenate([cat_val, dog_val])
test_files = np.concatenate([cat_test, dog_test])
# 创建新的文件夹路径
os.mkdir(train_dir) if not os.path.isdir(train_dir) else None
os.mkdir(val_dir) if not os.path.isdir(val_dir) else None
os.mkdir(test_dir) if not os.path.isdir(test_dir) else None

# 把指定目录中的文件，复制到另一个目录下
# for fn in log_progress(train_files, name='Training Images'):
#     shutil.copy(fn, train_dir)
#
# for fn in log_progress(validate_files, name='Validation Images'):
#     shutil.copy(fn, val_dir)
#
# for fn in log_progress(test_files, name='Test Images'):
#     shutil.copy(fn, test_dir)
import matplotlib.pyplot as plt
from keras.preprocessing.image import ImageDataGenerator, load_img, img_to_array, array_to_img

IMG_DIM = (150, 150) # 图片的大小

train_files = glob.glob('F:\\dogsvscats\\training_data\\*')
# 加载图片，然后转换成数组,每一张都是(150,150,3)
train_imgs = [img_to_array(load_img(img, target_size=IMG_DIM)) for img in train_files]
# 列表转array
train_imgs = np.array(train_imgs)
# 下面的3，根据目录层次来决定
train_labels = [fn.split('\\')[3].split('.')[0].strip() for fn in train_files]

validation_files = glob.glob('F:\\dogsvscats\\validation_data\\*')
validation_imgs = [img_to_array(load_img(img, target_size=IMG_DIM)) for img in validation_files]
validation_imgs = np.array(validation_imgs)
validation_labels = [fn.split('\\')[3].split('.')[0].strip() for fn in validation_files]

print('Train dataset shape:', train_imgs.shape,
      '\tValidation dataset shape:', validation_imgs.shape)
train_imgs_scaled = train_imgs.astype('float32') # 转为float32类型
validation_imgs_scaled  = validation_imgs.astype('float32')

train_imgs_scaled /= 255 # 归一化
validation_imgs_scaled /= 255
print(train_imgs[0].shape)
array_to_img(train_imgs[0]) # 数组转成图片

batch_size = 30
num_classes = 2
epochs = 30
input_shape = (150, 150, 3)
# encode text category labels
from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()
le.fit(train_labels)
# 把标签转换为0,1类别
train_labels_enc = le.transform(train_labels)
validation_labels_enc = le.transform(validation_labels)

print(train_labels[1495:1505], train_labels_enc[1495:1505])
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from keras.models import Sequential
from keras import optimizers

model = Sequential()

model.add(Conv2D(16, kernel_size=(3, 3), activation='relu',
                 input_shape=input_shape))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(64, kernel_size=(3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(128, kernel_size=(3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Flatten())
model.add(Dense(512, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

# 设置编译的参数
model.compile(loss='binary_crossentropy',
              optimizer=optimizers.RMSprop(),
              metrics=['accuracy'])
# model.summary()输出模型各层的参数状况
model.summary()

# 需要安装install graphviz
#from IPython.display import SVG
#from keras.utils.vis_utils import model_to_dot
#SVG(model_to_dot(model, show_shapes=True,
#                 show_layer_names=True, rankdir='TB').create(prog='dot', format='svg'))
# 手写数字识别

history = model.fit(x=train_imgs_scaled, y=train_labels_enc,
                    validation_data=(validation_imgs_scaled, validation_labels_enc),
                    batch_size=batch_size,epochs=epochs,verbose=1)
# loss: 0.0375 - accuracy: 0.9941 - val_loss: 4.7069 - val_accuracy: 0.7362
# 发现在测试集上的正确率有99%，而在验证集上的正确率只有73%，说明模型有些过拟合.